In [1]:
repo_pth = '../../'
#resource_pth = '../../opt/rfcx-data/'
resource_pth = '../../../resources/'
In [2]:
import os
import sys
from datetime import datetime
import numpy as np
import pandas
import seaborn
from matplotlib import pyplot as plt
import sqlite3
import pickle
add_paths = [repo_pth+'rfcx-worker-analysis/modules/domain_modules', repo_pth+'notebook-display']
for p in add_paths:
if p not in sys.path:
sys.path.append(p)
import load_sound
import spectral_analysis
import fingerprinting
import sound_classification
from IPython.html.widgets import interactive, Checkbox, interact
from IPython.display import display, HTML
from IPython.html import widgets
from IPython.core.display import clear_output
import nbio
In [8]:
reload(load_sound)
reload(spectral_analysis)
reload(fingerprinting)
reload(sound_classification)
reload(nbio)
show = nbio.show
read_sound = load_sound.read_sound
write_sound = load_sound.write_sound
Sound = load_sound.Sound
Spectrum = spectral_analysis.Spectrum
Profile = fingerprinting.Profile
SoundClassifier = sound_classification.SoundClassifier
def play(snd):
nbio.play(snd.data, snd.samplerate)
def read_sound(fp):
fn = fp.split('/')[-1]
name = fn.split('.')[0]
gdate, time = name.split('T')
time = time.replace('-',':')
gid, y, m, d = gdate.split('-')
date = '-'.join([y,m,d])
dt = 'T'.join([date,time])
meta_data = {'guardian_id':gid, 'start_time':dt}
return load_sound.read_sound(fp, meta_data)
In [9]:
event_fn = resource_pth+'events2.tsv'
data_pth = resource_pth+'wav/'
in_dir = sorted(os.listdir(data_pth))
print '%s files found in %s' % (len(set(in_dir)), data_pth)
In [10]:
df = pandas.io.parsers.read_csv(
event_fn,
sep ='\t',
#delim_whitespace=True,
parse_dates = ['time'],
infer_datetime_format=True,
).groupby('has_file').get_group(True)
In [11]:
fips = []
tot = len(df)
for i,ev in df.iterrows():
#if i>1:break
clear_output()
print "%i/%i"%(i,tot)
sys.stdout.flush()
snd = read_sound(data_pth+ev.location).crop(ev.start_seek,ev.stop_seek)
spc = Spectrum(snd)
pro = Profile(spc)
scl = SoundClassifier(pro)
scl.run_profile()
scl.classify_interest_areas()
mx_dur = 0
best_fip = None
for fip in pro.interest_fingerprints:
strt, stop = fip['time_interval']
dur = stop - strt
if dur>mx_dur: mx_dur= dur; best_fip= fip
fips.append(best_fip)
pickle.dump(fips, open('current_classifier_fips.pkl', 'w'))
In [8]:
fips = pickle.load(open('current_classifier_fips.pkl'))
In [12]:
df['fips'] = fips
print len(fips)
In [13]:
print fp
In [15]:
for i,ev in df.iterrows():
fip = ev['fips']
if fip is not None:
if True or fip['volume_power'].max()>1000000:
print fip['event_timestamp']
strt, stop = fip['time_interval']
strt = ev.start_seek
stop = ev.stop_seek
fp = fip['meta_data']['filepath']
fn = fp.split('/')[-1]
fp = data_pth+fn
break
snd = read_sound(fp).crop(strt,stop)
print np.max(snd.data)
snd.mute_saturated()
print np.max(snd.data)
play(snd)
spc = Spectrum(snd)
pro = Profile(spc)
scl = SoundClassifier(pro)
scl.run_profile()
scl.classify_interest_areas()
nbio.show(pro.harmonic_power)
nbio.show(pro.profile_plot(start_freq=0, end_freq=2000))
In [16]:
from scipy.interpolate import interp1d
from scipy.ndimage.filters import convolve1d
from scipy.signal import hann
from spectral_analysis import Bbox
strt_f, stop_f = 15.,100.
bbox = Bbox(spc, start_freq=strt_f, end_freq=stop_f)
freq_ix_slice = bbox.ix()[0]
a = spc.abs_arr
intvls = np.linspace(strt_f, stop_f, 100)
hrmncs = np.arange(10)+1
X, Y = np.meshgrid(intvls, hrmncs)
b = X * Y
arr = np.arange(len(spc.freqs))
freq2ix = interp1d(spc.freqs, arr,
kind='linear', bounds_error=False)
ix = freq2ix(b).astype(int)
z = a[ix].sum(0)
win = hann(7)
z = convolve1d(z, win, axis=1)
x = np.argmax(z,0)
h = intvls[x]
peaks_ix = np.outer(hrmncs,freq2ix(h))
peaks_mag = np.zeros_like(peaks_ix)
peaks_ix = peaks_ix.astype(int)
for i in range(peaks_ix.shape[1]):
ix = peaks_ix[:,i]
peaks_mag[:,i] = a[ix,i]
peaks_avg = np.average(peaks_mag, 0)
overall_avg = np.average(a[freq_ix_slice,:], axis=0)
h_power = peaks_avg/overall_avg
print 'peaks ix'
nbio.show(peaks_ix, bound=True)
print 'peaks_mag'
nbio.show(peaks_mag, bound=True)
print 'peaks_avg'
nbio.show(peaks_avg)
print 'overall_avg'
nbio.show(overall_avg)
print 'h_power'
nbio.show(h_power)
print 'h'
nbio.show(h, bound=True)
nbio.show(pro.harmonic_power)
In [249]:
valid_df = df.groupby('valid').get_group(1)
In [250]:
for i, ev in valid_df.iterrows():
if ev.fips:
print ev['type'], ev.fips.get('classification')
else:
print ev['type'], '*****'
In [219]:
vehic_fips = []
for fip in fips:
if fip is not None and fip['classification'] not in ['GSM_Noise','unknown_sound']:
strt, stp = fip['time_interval']
if True: #stp-strt>3:
vehic_fips.append(fip)
print len(vehic_fips)
In [220]:
for fip in vehic_fips[:20]:
snd = read_sound(fip['meta_data']['filepath']).crop(*fip['time_interval'])
md = fip['meta_data']
print fip['classification'], md['guardian_id'], md['start_time'], fip['time_interval'][0]
play(snd)
In [221]:
found_locs=[]
for fip in vehic_fips:
fp = fip['meta_data']['filepath']
loc = fp.split('/')[-1]
found_locs.append(loc)
In [240]:
for x in [(),(1),(1,),list(1,2)]:
print type(x)
In [222]:
valid_locs = [ev[1].location for ev in valid_df.iterrows()]
print found_locs[0]
print valid_locs[0]
In [223]:
def find_vol(loc):
for i,ev in valid_df.iterrows():
if ev.location == loc:
return loc, ev.start_seek, ev.stop_seek
loc, start, stop = find_vol(valid_locs[0])
In [224]:
false_poss = set(found_locs).difference(valid_locs)
print len(false_poss)
In [225]:
false_negs = set(valid_locs).difference(found_locs)
print len(false_negs)
for loc in list(false_negs)[:10]:
loc, start, stop = find_vol(loc)
play(read_sound(data_pth+loc).crop(start,stop))
In [169]:
vals = []
for loc in in_dir:
snd = read_sound(data_pth+loc)
vals.append(snd.meta_data['max_val'])
In [173]:
np.histogram(vals)
Out[173]:
In [ ]: